# 导入库
import pandas as pd
import numpy as np

dataset = pd.read_csv('../../datasets/Day3.csv')
X = dataset.iloc[ : , :-1].values
Y = dataset.iloc[ : ,  4 ].values

# 编码分类数据
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
X[: , 3] = labelencoder.fit_transform(X[ : , 3])
onehotencoder = OneHotEncoder(categorical_features = [3])
X = onehotencoder.fit_transform(X).toarray()

# 避免虚拟变量圈套
X = X[: , 1:]

# 数据集拆分为训练集合测试集
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

# 将多元线性回归拟合到训练集
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, Y_train)

# 预测测试集结果
y_pred = regressor.predict(X_test)

# 回归评估
from sklearn.metrics import r2_score
print(r2_score(Y_test,y_pred))
